// NationscapeEthnicity
log using NationscapeEthnicity.log, replace
use "C:\Users\sbstjp\OneDrive - Cardiff University\Nationscapedataset.dta" // Tausanovitch, Chris and Lynn Vavreck. 2020. Democracy Fund + UCLA Nationscape, October 10-17, 2019 (version 20200814). Retrieved from [URL].  Date accessed: March 09, 2025.

keep gender age education household_income race_ethnicity weight ideo5 statements_gender_identity reparations group_favorability_blm group_favorability_undocumented start_date

// Social justice scale
*Drop missing values
replace statements_gender_identity=. if statements_gender_identity>4 
replace group_favorability_blm=. if group_favorability_blm>4 
replace reparations=. if reparations>2 
replace group_favorability_undocumented=. if group_favorability_undocumented==999 

*Rename so shorter
rename group_favorability_blm blm
rename group_favorability_undocumented undocumented
rename statements_gender_identity genderidentity

*Reverse variables so social justice is coded high
foreach var in blm reparations undocumented {
    qui sum `var'
    local max_value = r(max)
    gen r`var' = `max_value' + 1 - `var'
}

*Standardize items in the scale from 1-2 - this avoids 0, for reasons outlined in next step
foreach var in rblm rreparations rundocumented genderidentity {
    summarize `var'
    gen s`var' = 1 + (`var' - r(min)) / (r(max) - r(min))
}

* Replace missing values with 0 for the specified variables - this is necessary as Stata doesn't add up missing values
foreach var in srblm srreparations srundocumented sgenderidentity {
    replace `var' = 0 if missing(`var')
}

* Initialize the total score and the count of non-zero responses
gen total_scoreSJV = 0
gen count_nonzeroSJV = 0

* Add each variable to the total scale score and count it if non-zero
foreach var in srblm srreparations srundocumented sgenderidentity {
    replace total_scoreSJV = total_scoreSJV + `var'
    replace count_nonzeroSJV = count_nonzeroSJV + (`var' != 0)
}

* Calculate the average score, avoiding division by zero
gen SocJusValues = .
replace SocJusValues = total_scoreSJV / count_nonzeroSJV if count_nonzeroSJV > 0

// Demographic variables
* Reverse code gender
egen maxval = max(gender)
gen FemaleGender = maxval + 1 - gender
drop maxval

*Create dummies
gen Graduate=.
replace Graduate=0 if inrange(education, 1, 7)
replace Graduate=1 if inrange(education, 8, 11)

gen White = .
replace White = 1 if race_ethnicity == 1
replace White = 0 if inrange(race_ethnicity, 2, 15) 

gen SouthAsian=.
replace SouthAsian=1 if race_ethnicity==4
replace SouthAsian=0 if inrange(race_ethnicity, 5, 15)
replace SouthAsian=0 if inrange(race_ethnicity, 1, 3)

gen EastAsian=.
replace EastAsian=1 if inrange(race_ethnicity, 5, 9)
replace EastAsian=0 if inrange(race_ethnicity, 1, 4)
replace EastAsian=0 if inrange(race_ethnicity, 10, 15)

gen Black=.
replace Black=1 if race_ethnicity==2
replace Black=0 if race_ethnicity==1
replace Black=0 if inrange(race_ethnicity, 3, 15)

// Standardize
egen Income = std(household_income)
egen Age = std(age)

// Regressions
regress SocJusValues Age FemaleGender Graduate Income White if quarter(dofc(start_date)) == 4 & year(dofc(start_date)) == 2020 [pweight=weight], robust
eststo
regress SocJusValues Age FemaleGender Graduate Income Black if quarter(dofc(start_date)) == 4 & year(dofc(start_date)) == 2020 [pweight=weight], robust
eststo
regress SocJusValues Age FemaleGender Graduate Income SouthAsian if quarter(dofc(start_date)) == 4 & year(dofc(start_date)) == 2020 [pweight=weight], robust
eststo
regress SocJusValues Age FemaleGender Graduate Income EastAsian if quarter(dofc(start_date)) == 4 & year(dofc(start_date)) == 2020 [pweight=weight], robust
eststo
esttab

// Correlations
pwcorr SocJusValues White Black SouthAsian EastAsian if quarter(dofc(start_date)) == 4 & year(dofc(start_date)) == 2020 [aweight=weight], sig

log close